# Import required R libraries
library(fpp3)
Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.
Explain the differences among these figures. Do they all indicate that the data are white noise?
Answer: XXX
Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?
Answer: XXX
A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.
gafa_stock %>%
filter(Symbol == 'AMZN') %>%
autoplot(Close) +
labs(title = "Amazon Daily Closing Stock Price",
y = "Price")
gafa_stock %>%
filter(Symbol == "AMZN") %>%
ACF(Close) %>%
autoplot()
gafa_stock %>%
filter(Symbol == "AMZN") %>%
PACF(Close) %>%
autoplot()
For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.
Turkish GDP from global_economy.
turkey_gdp <- global_economy %>%
filter(Country == 'Turkey')
lambda <- turkey_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
turkey_gdp %>%
autoplot(box_cox(GDP, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Turkish GDP with $\\lambda$ = ",
round(lambda,2))))
Appears to have an increasing trend after Box-Cox transformation. Attempt first differencing.
turkey_gdp <- turkey_gdp %>%
mutate(diff_bc_gdp = difference(box_cox(GDP, lambda)))
# Display plot of first difference
turkey_gdp %>%
autoplot(diff_bc_gdp) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Turkish GDP with $\\lambda$ = ",
round(lambda,2))))
# Apply Ljung-Box test
turkey_gdp %>%
features(diff_bc_gdp, ljung_box, lag = 10)
## # A tibble: 1 × 3
## Country lb_stat lb_pvalue
## <fct> <dbl> <dbl>
## 1 Turkey 5.84 0.829
Accommodation takings in the state of Tasmania from aus_accommodation.
Quarterly data
tas_takings <- aus_accommodation %>%
filter(State == 'Tasmania')
lambda <- tas_takings %>%
features(Takings, features = guerrero) %>%
pull(lambda_guerrero)
tas_takings %>% autoplot(box_cox(Takings, lambda)) + labs(y = "“, title = latex2exp::TeX(paste0(”Transformed gas production with \(\\lambda\) = ", round(lambda,2))))
tas_takings %>%
transmute(
`Takings` = Takings,
`Box-Cox Takings` = box_cox(Takings, lambda),
`Annual change in Box-Cox Takings` = difference(box_cox(Takings, lambda), 4),
`Doubly differenced Takings` =
difference(difference(box_cox(Takings, lambda), 4), 1)
) %>%
pivot_longer(-Date, names_to="Type", values_to="Takings") %>%
mutate(
Type = factor(Type, levels = c(
"Takings",
"Box-Cox Takings",
"Annual change in Box-Cox Takings",
"Doubly differenced Takings"))
) %>%
ggplot(aes(x = Date, y = Takings)) +
geom_line() +
facet_grid(vars(Type), scales = "free_y") +
labs(title = "Tasmanian Accomodation Takings", y = NULL)
Definitely appears to have a seasonal component.
Appears the doubly differencing is necessary.
tas_takings %>%
mutate(box_cox_takings = box_cox(Takings, lambda)) %>%
features(box_cox_takings, unitroot_nsdiffs)
## # A tibble: 1 × 2
## State nsdiffs
## <chr> <int>
## 1 Tasmania 1
# Result is ...
#> # A tibble: 1 x 1
#> nsdiffs
#> <int>
#> 1 1
tas_takings %>%
mutate(box_cox_takings = difference(box_cox(Takings, lambda), 4)) %>%
features(box_cox_takings, unitroot_ndiffs)
## # A tibble: 1 × 2
## State ndiffs
## <chr> <int>
## 1 Tasmania 0
# Result is ...
#> # A tibble: 1 x 1
#> ndiffs
#> <int>
#> 1 0
# DO I REALLY WANT TO KEEP THIS?
# Apply Ljung-Box test
tas_takings %>%
features(difference(difference(box_cox(Takings, lambda), 4), 1), ljung_box, lag = 12)
## # A tibble: 1 × 3
## State lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 29.0 0.00398
## NOT THE RESULT I WANT, DOUBLE-CHECK ##
## unitroot_nsdiffs() from section 9.1 instead of ljung-box test
tas_takings %>%
ACF(difference(difference(box_cox(Takings, lambda), 4), 1)) %>%
autoplot()
Monthly sales from souvenirs.
lambda <- souvenirs %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)
souvenirs %>%
autoplot(box_cox(Sales, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
souvenirs %>%
transmute(
`Sales` = Sales,
`Box-Cox sales` = box_cox(Sales, lambda),
`Annual change in Box-Cox sales` = difference(box_cox(Sales, lambda), 12),
`Doubly differenced Box-Cox sales` =
difference(difference(box_cox(Sales, lambda), 12), 1)
) %>%
pivot_longer(-Month, names_to="Type", values_to="Sales") %>%
mutate(
Type = factor(Type, levels = c(
"Sales",
"Box-Cox sales",
"Annual change in Box-Cox sales",
"Doubly differenced Box-Cox sales"))
) %>%
ggplot(aes(x = Month, y = Sales)) +
geom_line() +
facet_grid(vars(Type), scales = "free_y") +
labs(title = "Souvenirs Sales", y = NULL)
Definitely appears to have a seasonal component.
Appears the double differencing is needed.
souvenirs %>%
mutate(box_cox_sales = box_cox(Sales, lambda)) %>%
features(box_cox_sales, unitroot_nsdiffs)
## # A tibble: 1 × 1
## nsdiffs
## <int>
## 1 1
# Result is ....
#> # A tibble: 1 x 1
#> nsdiffs
#> <int>
#> 1 1
# Now try differencing on the seasonal difference
souvenirs %>%
mutate(box_cox_sales = difference(box_cox(Sales, lambda), 12)) %>%
features(box_cox_sales, unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 0
# Result is ...
#> # A tibble: 1 x 1
#> ndiffs
#> <int>
#> 1 0
# DO I REALLY WANT THIS??
# Apply Ljung-Box test
souvenirs %>%
features(difference(difference(box_cox(Sales, lambda), 12), 1), ljung_box, lag = 12)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 26.2 0.0102
## NOT THE RESULT I WANT, DOUBLE-CHECK ##
## unitroot_nsdiffs() from section 9.1 instead of ljung-box test
For your retail data (from Exercise 8 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.
set.seed(8675309)
# Montly data
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
autoplot(Turnover)
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Queensland Takeaway food services A3349718A 1982 Apr 26.7
## 2 Queensland Takeaway food services A3349718A 1982 May 27.3
## 3 Queensland Takeaway food services A3349718A 1982 Jun 26.2
## 4 Queensland Takeaway food services A3349718A 1982 Jul 25.2
## 5 Queensland Takeaway food services A3349718A 1982 Aug 25.6
## 6 Queensland Takeaway food services A3349718A 1982 Sep 26.7
# Box Cox
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries %>%
autoplot(box_cox(Turnover, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed retail tunrover with $\\lambda$ = ",
round(lambda,2))))
myseries %>%
transmute(
`Turnover` = Turnover,
`Box-Cox turnover` = box_cox(Turnover, lambda),
`Annual change in Box-Cox turnover` = difference(box_cox(Turnover, lambda), 12),
`Doubly differenced Box-Cox turnover` =
difference(difference(box_cox(Turnover, lambda), 12), 1)
) %>%
pivot_longer(-Month, names_to="Type", values_to="Turnover") %>%
mutate(
Type = factor(Type, levels = c(
"Turnover",
"Box-Cox turnover",
"Annual change in Box-Cox turnover",
"Doubly differenced Box-Cox turnover"))
) %>%
ggplot(aes(x = Month, y = Turnover)) +
geom_line() +
facet_grid(vars(Type), scales = "free_y") +
labs(title = "Retail Turnover", y = NULL)
# nsdiff
myseries %>%
mutate(box_cox_turnover = box_cox(Turnover, lambda)) %>%
features(box_cox_turnover, unitroot_nsdiffs)
## # A tibble: 1 × 3
## State Industry nsdiffs
## <chr> <chr> <int>
## 1 Queensland Takeaway food services 1
# Result is ....
#> # A tibble: 1 x 1
#> nsdiffs
#> <int>
#> 1 1
# ndiff
# Now try differencing on the seasonal difference
myseries %>%
mutate(box_cox_turnover = difference(box_cox(Turnover, lambda), 12)) %>%
features(box_cox_turnover, unitroot_ndiffs)
## # A tibble: 1 × 3
## State Industry ndiffs
## <chr> <chr> <int>
## 1 Queensland Takeaway food services 1
# Result is ... also 1
#> # A tibble: 1 x 1
#> ndiffs
#> <int>
#> 1 1
Simulate and plot some data from simple ARIMA models.
Use the following R code to generate data from an AR(1) model with
\(\phi_1 = 0.6\) and \(\sigma^2 = 1\). The process starts with \(y_1 = 0\).
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
Produce a time plot for the series. How does the plot change as you change \(\phi_1\)?
sim %>% autoplot(y)
# phi=0.2
for(i in 2:100)
y[i] <- 0.2*y[i-1] + e[i]
sim02 <- tsibble(idx = seq_len(100), y = y, index = idx)
# phi=1.0
for(i in 2:100)
y[i] <- 1.0*y[i-1] + e[i]
sim10 <- tsibble(idx = seq_len(100), y = y, index = idx)
sim02 %>% autoplot(y)
sim10 %>% autoplot(y)
Lower the \(\phi_1\), the more oscillations occur, the higher the fewer oscillations
Write your own code to generate data from an MA(1) model with \(\theta_1 = 0.6\) and \(\sigma^2 = 1\).
\[MA(1)\ is\ y_t = c + \epsilon_t + \theta_1\epsilon_{t-1}\]
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- e[i] + 0.6*e[i-1]
sim_ma1 <- tsibble(idx = seq_len(100), y = y, index = idx)
#head(sim_ma1)
Produce a time plot for the series. How does the plot change as you change \(\theta_1\)?
sim_ma1 %>% autoplot(y)
# theta is 0.2
for(i in 2:100)
y[i] <- e[i] + 0.2*e[i-1]
sim_ma02 <- tsibble(idx = seq_len(100), y = y, index = idx)
# theta is 1.0
for(i in 2:100)
y[i] <- e[i] + 1.0*e[i-1]
sim_ma10 <- tsibble(idx = seq_len(100), y = y, index = idx)
sim_ma02 %>% autoplot(y)
sim_ma10 %>% autoplot(y)
Generate data from an ARMA(1,1) model with \(\phi_1 = 0.6,\) \(\theta_1 = 0.6\) and \(\sigma^2 = 1\).
y <- numeric(100)
e <- rnorm(100)
phi <- 0.6
theta <- 0.6
for(i in 2:100)
y[i] <- phi*y[i-1] + theta*e[i-1] + e[i]
sim_arma11 <- tsibble(idx = seq_len(100), y = y, index = idx)
Generate data from an AR(2) model with \(\phi_1 = -0.8\), \(\phi_2 = 0.3\) and \(\sigma^2 = 1\). (Note that these parameters will give a non-stationary series.)
\[AR(2)\ is \ y_t = c + \phi_1y_{t-1} + \phi_2y_{t-2} + \epsilon_t\]
y <- numeric(100)
e <- rnorm(100)
for(i in 3:100)
y[i] <- -0.8*y[i-1] + 0.3*y[i-2] + e[i]
sim_ar2 <- tsibble(idx = seq_len(100), y = y, index = idx)
Graph the latter two series and compare them.
sim_arma11 %>% autoplot(y)
sim_ar2 %>% autoplot(y)
Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.
aus_airpassengers %>% head()
## # A tsibble: 6 x 2 [1Y]
## Year Passengers
## <dbl> <dbl>
## 1 1970 7.32
## 2 1971 7.33
## 3 1972 7.80
## 4 1973 9.38
## 5 1974 10.7
## 6 1975 11.1
# Year Passengers
Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.
# Generate ARIMA model
aus_air_mod <- aus_airpassengers %>% model(ARIMA(Passengers))
Display model definition
# Output report to show model selected
report(aus_air_mod)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
Check the residuals look like white noise
aus_air_mod %>% gg_tsresiduals()
Plot forecasts for the next 10 periods
aus_air_fc <- aus_air_mod %>% forecast(h=10)
aus_air_fc
## # A fable: 10 x 4 [1Y]
## # Key: .model [1]
## .model Year Passengers .mean
## <chr> <dbl> <dist> <dbl>
## 1 ARIMA(Passengers) 2017 N(75, 4.3) 74.8
## 2 ARIMA(Passengers) 2018 N(77, 9.6) 77.0
## 3 ARIMA(Passengers) 2019 N(79, 16) 79.2
## 4 ARIMA(Passengers) 2020 N(81, 23) 81.3
## 5 ARIMA(Passengers) 2021 N(84, 32) 83.5
## 6 ARIMA(Passengers) 2022 N(86, 42) 85.7
## 7 ARIMA(Passengers) 2023 N(88, 53) 87.9
## 8 ARIMA(Passengers) 2024 N(90, 66) 90.1
## 9 ARIMA(Passengers) 2025 N(92, 80) 92.3
## 10 ARIMA(Passengers) 2026 N(94, 97) 94.5
Write the model in terms of the backshift operator.
Model: ARIMA(0,2,1)
\[(1-B)^2y_t = c + (1+\theta_1B)\epsilon_t\]
Random work, probably delete \[B(By_t) = B^2y_t = y_{t-2}\]
Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.
# Apparently drift just gets applied
aus_air_arima010 <- aus_airpassengers %>%
model(ARIMA(Passengers ~ pdq(0,1,0)))
report(aus_air_arima010)
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.4191
## s.e. 0.3014
##
## sigma^2 estimated as 4.271: log likelihood=-98.16
## AIC=200.31 AICc=200.59 BIC=203.97
Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.
aus_air_arima212 <- aus_airpassengers %>%
model(ARIMA(Passengers ~ pdq(1,1,2)))
report(aus_air_arima212)
## Series: Passengers
## Model: ARIMA(1,1,2) w/ drift
##
## Coefficients:
## ar1 ma1 ma2 constant
## 0.9240 -0.9828 0.1323 0.1083
## s.e. 0.1155 0.1920 0.1479 0.0384
##
## sigma^2 estimated as 4.388: log likelihood=-97.28
## AIC=204.57 AICc=206.07 BIC=213.71
# 212 produces NULL model
# 211 works though
# 112 works, too
Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?
aus_air_arima021 <- aus_airpassengers %>%
model(ARIMA(Passengers ~ pdq(0,2,1)))
report(aus_air_arima021)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
For the United States GDP series (from global_economy):
us_gdp <- global_economy %>%
filter(Country == "United States") %>%
mutate(GDP = GDP/1e9) # GDP in billions
head(us_gdp)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States USA 1960 543. NA 13.6 4.20 4.97 180671000
## 2 United States USA 1961 563. 2.30 13.7 4.03 4.90 183691000
## 3 United States USA 1962 605. 6.10 13.9 4.13 4.81 186538000
## 4 United States USA 1963 639. 4.40 14.0 4.09 4.87 189242000
## 5 United States USA 1964 686. 5.80 14.2 4.10 5.10 191889000
## 6 United States USA 1965 744. 6.40 14.4 4.24 4.99 194303000
us_gdp %>% autoplot(GDP)
if necessary, find a suitable Box-Cox transformation for the data;
# Box Cox
lambda <- us_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
us_gdp %>%
autoplot(box_cox(GDP, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed US GDP with $\\lambda$ = ",
round(lambda,2))))
us_gdp <- us_gdp %>%
mutate(GDP_T = box_cox(GDP, lambda))
us_gdp <- us_gdp %>%
mutate(Diff = difference(GDP_T))
head(us_gdp)
## # A tsibble: 6 x 11 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population GDP_T Diff
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United… USA 1960 543. NA 13.6 4.20 4.97 180671000 17.4 NA
## 2 United… USA 1961 563. 2.30 13.7 4.03 4.90 183691000 17.6 0.215
## 3 United… USA 1962 605. 6.10 13.9 4.13 4.81 186538000 18.0 0.431
## 4 United… USA 1963 639. 4.40 14.0 4.09 4.87 189242000 18.4 0.330
## 5 United… USA 1964 686. 5.80 14.2 4.10 5.10 191889000 18.8 0.445
## 6 United… USA 1965 744. 6.40 14.4 4.24 4.99 194303000 19.3 0.517
us_gdp %>%
ACF(Diff) %>%
autoplot()
us_gdp %>%
PACF(Diff) %>%
autoplot()
Results in \(\lambda = 0.28\).
fit a suitable ARIMA model to the transformed data using ARIMA();
us_gdp_mod <- us_gdp %>% model(ARIMA(GDP_T))
report(us_gdp_mod)
## Series: GDP_T
## Model: ARIMA(1,1,0) w/ drift
##
## Coefficients:
## ar1 constant
## 0.4586 0.3428
## s.e. 0.1198 0.0276
##
## sigma^2 estimated as 0.0461: log likelihood=7.72
## AIC=-9.43 AICc=-8.98 BIC=-3.3
try some other plausible models by experimenting with the orders chosen;
us_gdp_mod111 <- us_gdp %>%
model(ARIMA(GDP_T ~ pdq(1,1,1)))
us_gdp_mod211 <- us_gdp %>%
model(ARIMA(GDP_T ~ pdq(2,1,1)))
us_gdp_mod112 <- us_gdp %>%
model(ARIMA(GDP_T ~ pdq(1,1,2)))
us_gdp_mod101 <- us_gdp %>%
model(ARIMA(GDP_T ~ pdq(1,0,1)))
choose what you think is the best model and check the residual diagnostics;
produce forecasts of your fitted model. Do the forecasts look reasonable?
compare the results with what you would obtain using ETS() (with no transformation).